#Alexander F. Gazmararian
#afg2@princeton.edu
#January 9, 2024

#Purpose: Process POWER grant data for subsequent analyses.

#Load packages
library(tidyverse)
library(tidylog)
library(here)
library(readxl)
library(janitor)

#Load data
g <- read_xlsx(here("data", "input", "power", "POWER with locations through FY22.xlsx"))

#Clean names
g <- clean_names(g)
#Basic descriptive stats
summary(g$funds_total)
sum(g$funds_total) #794 million over the program's life
#Figure H6
plot.power <- g %>%
  group_by(fiscal_year) %>%
  summarise(n = n()) %>%
  ggplot(aes(x = fiscal_year, y = n)) +
  geom_vline(xintercept = 2008, color = "red") +
  geom_col() +
  scale_x_continuous(limits = c(2000, 2022)) +
  scale_y_continuous(expand = c(0,0), limits = c(0, 110)) +
  theme_bw(base_size = 13) +
  theme(panel.grid = element_blank()) +
  labs(x = "Year", y = "POWER Grants")
ggsave(
  plot.power,
  filename = here("output", "figures", "si_fig_H6_powergrants.png"),
  width = 6.5,
  height = 2.9,
  scale = 1.5,
  dpi = 300
)

#Expand county names into a list
g <- separate(
  data = g,
  col = county,
  into = paste0("x", seq(1, 500, 1)),
  sep = ","
)
#Pivot to longer so each row is a state-county-project
g <- g %>% pivot_longer(cols = c(x1:x500))
#Trim NAs
g <- subset(g, !is.na(value))
#Fix names
g <- g %>% rename(county = value)
#Fix names
g$county <- gsub("\r\n", ",", g$county)
g <- separate(
  data = g,
  col = county,
  into = paste0("x", seq(1, 3, 1)),
  sep = ","
)
g$name <- NULL
g <- g %>% pivot_longer(cols = c(x1:x3))
#Drop column
g$name <- NULL
g <- g %>% rename(county = value)
#Split observations with multiple states
g <- separate(
  data = g,
  col = state,
  into = paste0("x", seq(1, 50, 1)),
  sep = ","
)
#Pivot to longer so each row is a county-state-project
g <- g %>% pivot_longer(cols = c(x1:x50), values_to = "state")
#Drop meaningless column
g$name <- NULL
#Subset to only observations with states
g <- subset(g, !is.na(state))
g <- subset(g, state != "NA")
g <- subset(g, county != "")

#Add ARC counties
arc_counties <- read_xlsx(here("data", "input", "arc", "Appalachian-Counties-Served-by-ARC_2021.xlsx"), skip = 4)
names(arc_counties) <- tolower(names(arc_counties))
arc_counties$fips <- NULL
arc_counties$state <- state.abb[match(arc_counties$state, state.name)]
arc_counties <- arc_counties %>%
  rename(
    state_arc = state,
    state_county = county
  )
arc_counties$county <- "All ARC Counties"
#Merge with POWER data
g <- left_join(g, arc_counties, by = "county")
#Coalesce columns
g <- g %>%
  mutate(
    county = case_when(
      county == "All ARC Counties" ~ state_county,
      T ~ county
    ),
    state = case_when(
      county == "All ARC Counties" ~ state_arc,
      T ~ state
    )
  )
g <- subset(g, select = -c(state_arc, state_county))
# Remove cities appended to county name
g$county <- sub("\\s*\\+.*", "", g$county)
#Load county and fips data
fips <- read.csv(here("data", "input", "eiagenerator", "state_and_county_fips_master.txt"))
fips <- subset(fips, !is.na(state))
fips$name <- gsub(" County| Parish", "", fips$name)
fips <- fips %>% rename(county = name)
# Prep county names---remove punct
g$county <- tolower(gsub("[[:punct:]]", "", g$county))
#Fix st. clair
g <- g %>% mutate(county = ifelse(county == "stclair", "st clair", county))
fips$county <- tolower(gsub("[[:punct:]]", "", fips$county))
#Merge
g.fips <- left_join(g, fips, by = c("county", "state"))
#Inspect rows only in X
subset(g.fips, is.na(fips), select = c(county, state))
table(subset(g.fips, is.na(fips), select = c(county, state))$county)
#Subset observations without FIPS codes
g.fips <- subset(g.fips, !is.na(fips))
#Aggregate
g.agg <- g.fips %>%
  group_by(fiscal_year, fips) %>%
  summarise(
    funds_arc = sum(funds_arc),
    funds_total = sum(funds_total),
    grants = n()
  )
#Create cumulative variables
g.agg <- g.agg %>%
  group_by(fips) %>%
  arrange(fiscal_year) %>%
  mutate(
    funds_arc_cum = cumsum(funds_arc),
    funds_total_cum = cumsum(funds_total),
    grants_cum = cumsum(grants)
  )
#Rename variable
g.agg <- g.agg %>% rename(year = fiscal_year)
#Save output
saveRDS(g.agg, here("data", "inter", "power_grants.rds"))
